
*** census 2000

use ../raw/census2000/census00.dta, clear

rename id hhcode

g prov=int(hhcode/(10^16))	

* gender
tab r03,m
g male=r03==1
replace male=. if r03==.	

* age
tab r041, m //birth year
tab r042, m //birth month

clonevar birthy = r041
clonevar birthm = r042	

g int age=2000-r041
replace age=age-1 if r042==11 | r042==12

count if missing(age)

* ethinic indicator
tab r05,m

g han=r05==1
replace han=. if r05 > 56

* hukou, 1:urban, 0:rural
tab r07,m

g hukou=r07==2
replace hukou=. if r07==. | r07==0	

* birth place
tab r081,m

clonevar birthplace = r081

tab r082,m

clonevar birthoutprov = r082

g birthprov = prov if birthplace == 1 | birthplace == 2
replace birthprov = birthoutprov if birthplace == 3
replace birthprov = . if birthprov > 65


* education years
tab r151, m

clonevar edu = r151
replace edu = . if edu == 0

g eduy=0 if edu<3
replace eduy=6 if edu==3
replace eduy=9 if edu==4
replace eduy=12 if edu==5
replace eduy=13 if edu==6
replace eduy=15 if edu==7
replace eduy=16 if edu==8
replace eduy=19 if edu==9


* education status
tab r16,m

clonevar edus = r16
replace edus = . if r16 == 0

tab edus,m


* birth history
tab r251,m

foreach i in r251 r252 r253 r254 {

replace `i'=0 if `i'==.

}

clonevar chbornm = r251
clonevar chbornf = r252
clonevar chsurvm = r253
clonevar chsurvf = r254

foreach var of varlist chbornm chbornf chsurvm chsurvf  {
replace `var' = . if male == 1
replace `var' = . if male == 0 & (age >= 50 | age < 15) & !missing(age)
}

g chborn = chbornm + chbornf
g chsurv = chsurvm + chsurvf


foreach var of varlist _all {
	label var `var' ""
}

drop h0 h02-r265 ra*

compress

save ../data/census2000individual, replace




*** census 2005

use ../raw/census2005/chn2005.dta, clear

g prov=int(dz_code/(100))	
bys prov: g n=_n
count if n==1 // 31 provinces
drop n

* gender
tab r3,m

g male=r3==1
replace male=. if r3==.	

* age
tab r4_02, m //birth year
tab r4_03, m //birth month

clonevar birthy = r4_02
clonevar birthm = r4_03

g int age=2005-birthy
replace age=age-1 if birthm==11 | birthm==12

count if missing(age)

* ethnic indicator
tab r5,m

g han=r5==1
replace han=. if r5 > 56

clonevar ethn = r5

* hukou
tab r11,m nol
recode r11 (1=0)(2=1)(nonm=.), g(hukou)
tab hukou,m
	
* education level
tab r17, m nol

clonevar edu = r17

* education years
g eduy=0 if r17 == 1
replace eduy=6 if r17==2
replace eduy=9 if r17==3
replace eduy=12 if r17==4
replace eduy=15 if r17==5
replace eduy=16 if r17==6
replace eduy=19 if r17==7	

* birth history
tab r33_01,m

foreach i in r33_01 r33_02 r34_01 r34_02 {

	replace `i'=0 if `i'==.

}

clonevar chbornm = r33_01
clonevar chbornf = r33_02
clonevar chsurvm = r34_01
clonevar chsurvf = r34_02

foreach var of varlist chbornm chbornf chsurvm chsurvf  {
	replace `var' = . if male == 1
	replace `var' = . if male == 0 & (age >= 64 | age < 15) & !missing(age)
}

g chborn = chbornm + chbornf
g chsurv = chsurvm + chsurvf

g chdecem = chbornm - chsurvm
g chdecef = chbornf - chsurvf
g chdece = chborn - chsurv


order prov, before(hhcode)
drop h_id-r35_04

*labels
foreach var of varlist _all {
	label var `var' ""
}
compress

save ../data/census2005individual, replace


*** census 2010
use ../raw/census2010/census2010.dta, clear

rename h1 hhcode

* province
g prov=int(hhcode/(10^16))	

* gender
tab r3,m
g male=r3==1
replace male=. if r3==.	

* age
tab r4_1, m //birth year
tab r4_2, m //birth month

clonevar birthy = r4_1
clonevar birthm = r4_2	

g int age=2010-r4_1
replace age=age-1 if r4_2==11 | r4_2==12

count if missing(age)

compare age age_raw // the same
drop age_raw

* ethinic indicator
tab r5,m

g han=r5==1
replace han=. if r5 > 56

tab han,m

clonevar ethn = r5

* hukou, 1:urban, 0:rural
tab r11,m

g hukou=r11==2
replace hukou=. if r11==. 

* education years
tab r15, m

clonevar edu = r15
replace edu = . if edu == 0

g eduy=0 if edu == 1
replace eduy=6 if edu==2
replace eduy=9 if edu==3
replace eduy=12 if edu==4
replace eduy=15 if edu==5
replace eduy=16 if edu==6
replace eduy=19 if edu==7

* birth history

tab r26_1,m

foreach i in r26_1 r26_2 r26_3 r26_4 {

	replace `i'=0 if `i'==.

}

clonevar chbornm = r26_1
clonevar chbornf = r26_2
clonevar chsurvm = r26_3
clonevar chsurvf = r26_4

foreach var of varlist chbornm chbornf chsurvm chsurvf  {
	replace `var' = . if male == 1
	replace `var' = . if male == 0 & (age > 65 | age < 15) & !missing(age)
}

g chborn = chbornm + chbornf
g chsurv = chsurvm + chsurvf

g chdecem = chbornm - chsurvm
g chdecef = chbornf - chsurvf
g chdece = chborn - chsurv


* labels
foreach var of varlist _all {
	label var `var' ""
}

drop h2-r28

compress

save ../data/census2010individual, replace



*** merge census 1990 - 2000
local indvars "hhcode wave prov male han hukou age birthy birthm eduy edu chborn chbornm chbornf "

use ../data/census2000individual, clear
g int wave = 2000

append using ../data/census2005individual
replace wave = 2005 if missing(wave)

append using ../data/census2010individual
replace wave = 2010 if missing(wave)

keep `indvars'
* save memory

append using ../data/census1990individual
replace wave = 1990 if missing(wave)

keep `indvars'

rename hhcode hhcode_raw
egen hhcode = group(hhcode_raw wave)
drop hhcode_raw


*** sample restrictions

* province
drop if prov == 54 
* drop Tibet

tab prov wave
replace prov = 51 if prov == 50 
* merge Chongqing with Sichuan

replace prov = 44 if prov == 46 
* merge Hainan with Guangdong

keep if wave - birthy >= 40
keep if birthy >= 1940 & birthy <= 1960

keep if male == 0
keep if han == 1
tab hukou

tab chborn
keep if chborn >= 1 & chborn <= 10

tab birthy

tab birthy wave


*** fertility at each margin

tab chborn

clonevar nchild0 = chborn
clonevar nchild = chborn

forvalues j = 1/6 {
	g nchild`j' = chborn >= `j'
	label var nchild`j' "N >= `j'"
	* use chborn, which include non-coresiding children
}

sum chborn nchild*

compress

rename hukou urban
drop if missing(urban)

save ../data/censuses_ind, replace





